## *******************************
##     Figure 2 Descriptives      
##      Hao Zhang & Ye Zhang
##          2025/7/24
## *******************************

library(readstata13)
library(tidyverse)
library(doBy)

# set working directory
setwd("../raw data")

# read in data in the CIES folder
options(scipen = 999)
firm <- read.dta13("cies 1998-2007.dta") %>% 
  mutate(citycode = as.numeric(substr(as.character(city), 0, 4)))

# create ownership types
firm$ownership <- as.numeric(firm$ownership)
firm <- firm %>% mutate(soe = ifelse(ownership == 110 | ownership == 151, 1, 0),
                        hmt = ifelse(ownership > 199 & ownership < 299, 1, 0),
                        fie = ifelse(ownership > 299, 1, 0), 
                        private = ifelse(soe == 0 & hmt == 0 & fie == 0, 1, 0))

# create insurance dummy
firm <- firm %>% filter(insurance >= 0) %>% 
  mutate(dummy1 = ifelse(insurance == 0, 0, 1))

# create total_insurance and rate
firm_base <- firm %>% filter(year >= 2004, year <= 2006) %>% 
  select(citycode, year, frdm, wage) %>% filter(wage > 0) %>%
  mutate(year = year + 1)
firm2004 <- firm %>% filter(year >= 2004) %>%
  filter(insurance >= 0, medical >= 0) %>% 
  mutate(total_insurance = medical) %>%
  mutate(dummy2 = ifelse(total_insurance == 0, 0, 1))

# create discriptives about firms' participation rates
total_rate <- aggregate(firm$dummy1, list(firm$year), mean)
total_rate1 <- aggregate(firm2004$dummy2, list(firm2004$year), mean)

# store results
counts <- matrix(data = NA, nrow = 10, ncol = 2)
counts[,1] <- total_rate$x
counts[7:10,2] <- total_rate1$x
rownames(counts) <- 1998:2007
colnames(counts) <- c("Unemployment Insurance", "Medical Insurance and Pension")
counts <- as.data.frame(counts)

# plot results
pdf("participation_rates_across_year.pdf")
par(mfrow = c(1,1))
par(mar=c(5,5,3,2))
plot(1998:2007, counts$`Unemployment Insurance` , type = "l", col = "black", xlim = c(1998, 2007), ylim = c(0.2, 0.8), xlab = "Year", ylab = "Average Participation Rate",  cex.lab=1.5, cex.axis=1.2)
points(1998:2007, counts$`Unemployment Insurance`, pch = 16, col = "black")
lines(1998:2007, counts$`Medical Insurance and Pension`, col = "blue")
points(1998:2007, counts$`Medical Insurance and Pension`, pch = 17, col = "blue")
legend("topright", legend=c( "Medical Insurance and Pension", "Unemployment Insurance"), lty=1:1, col = c("blue",  "black"), pch = c(17, 16), cex=1.3)
dev.off()

# subset firm types
soe <- subset(firm, subset = soe == 1)
fie <- subset(firm, subset = fie == 1)
private <- subset(firm, subset = soe == 0 & fie == 0)

# aggregate by ownership and year
soe_rate <- aggregate(soe$dummy1, list(soe$year), mean)
fie_rate <- aggregate(fie$dummy1, list(fie$year), mean)
private_rate <- aggregate(private$dummy1, list(private$year), mean)

# store results
counts <- matrix(data = NA, nrow = 10, ncol = 3)
counts[,1] <- soe_rate$x
counts[,2] <- fie_rate$x
counts[,3] <- private_rate$x
rownames(counts) <- 1998:2007
colnames(counts) <- c("SOE", "FIE", "PE")
counts <- as.data.frame(counts)

# plot results
pdf("participation_rates_across_year_and_ownership.pdf")
par(mfrow = c(1,1))
par(mar=c(5,5,3,2))
plot(1998:2007, counts$SOE, type = "l", col = "Red", xlim = c(1998, 2007), ylim = c(0.2, 0.8), xlab = "Year", ylab = "Average Participation Rate in Unemployment Insurance",  cex.lab=1.5, cex.axis=1.2)
points(1998:2007, counts$SOE, pch = 16, col = "Red")
lines(1998:2007, counts$FIE, col = "purple")
points(1998:2007, counts$FIE, pch = 17, col = "purple")
lines(1998:2007, counts$PE, col = "orange")
points(1998:2007, counts$PE, pch = 15, col = "orange")
legend("topright", legend=c( "State-owned Firms", "Foreign Firms", "Private Firms"), lty=1:1, col = c("Red",  "purple", "orange"), pch = c(16, 17, 15), cex=1.2)
dev.off()